{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b20c86e7-d35c-4fb0-b2ef-87c1a5ca3ec8",
   "metadata": {},
   "source": [
    "# Install packages and import modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "030caa43-bf7b-4ce4-8fb8-f17b5f4c3565",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.schema import Document\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.vectorstores import ElasticsearchStore\n",
    "from langchain.llms import OpenAI\n",
    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
    "from langchain.chains.query_constructor.base import AttributeInfo"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b5ce1f7-afcd-42fd-adb7-7f28f7714d4d",
   "metadata": {},
   "source": [
    "# Create documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "dd3455b1-bc33-4de4-9bda-7e4928dfbc10",
   "metadata": {},
   "outputs": [],
   "source": [
    "docs = [\n",
    "    Document(\n",
    "        page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
    "        metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\", \"director\": \"Steven Spielberg\", \"title\": \"Jurassic Park\"},\n",
    "    ),\n",
    "    Document(\n",
    "        page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
    "        metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2, \"title\": \"Inception\"},\n",
    "    ),\n",
    "    Document(\n",
    "        page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
    "        metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6, \"title\": \"Paprika\"},\n",
    "    ),\n",
    "    Document(\n",
    "        page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
    "        metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3, \"title\": \"Little Women\"},\n",
    "    ),\n",
    "    Document(\n",
    "        page_content=\"Toys come alive and have a blast doing so\",\n",
    "        metadata={\"year\": 1995, \"genre\": \"animated\", \"director\": \"John Lasseter\", \"rating\": 8.3, \"title\": \"Toy Story\"},\n",
    "    ),\n",
    "    Document(\n",
    "        page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
    "        metadata={\n",
    "            \"year\": 1979,\n",
    "            \"rating\": 9.9,\n",
    "            \"director\": \"Andrei Tarkovsky\",\n",
    "            \"genre\": \"science fiction\",\n",
    "            \"rating\": 9.9,\n",
    "            \"title\": \"Stalker\",\n",
    "        },\n",
    "    ),\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0af99850-b7b1-460d-bf9a-355bd3015e24",
   "metadata": {},
   "source": [
    "# Connect to Elasticsearch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ebb694cb-a969-41c9-aed9-22dd9364f220",
   "metadata": {},
   "outputs": [],
   "source": [
    "from dotenv import load_dotenv\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.vectorstores import ElasticKnnSearch\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from urllib.request import urlopen\n",
    "import os, json\n",
    " \n",
    "load_dotenv()\n",
    " \n",
    "openai_api_key=os.getenv('OPENAI_API_KEY')\n",
    "elastic_user=os.getenv('ES_USER')\n",
    "elastic_password=os.getenv('ES_PASSWORD')\n",
    "elastic_endpoint=os.getenv(\"ES_ENDPOINT\")\n",
    "elastic_index_name='elasticsearch-self-query-demo'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "8a9c6db9-f9ac-44fd-b09d-95c0e064c9e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'SXGzrN4dSXW1t0pkWXGfjg', 'version': {'number': '8.11.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'd9ec3fa628c7b0ba3d25692e277ba26814820b20', 'build_date': '2023-11-04T10:04:57.184859352Z', 'build_snapshot': False, 'lucene_version': '9.8.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n"
     ]
    }
   ],
   "source": [
    "from elasticsearch import Elasticsearch\n",
    " \n",
    "url = f\"https://{elastic_user}:{elastic_password}@{elastic_endpoint}:9200\"\n",
    "connection = Elasticsearch(url, ca_certs = \"./http_ca.crt\", verify_certs = True)\n",
    " \n",
    "print(connection.info())\n",
    " \n",
    "embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)\n",
    "dims=1536\n",
    " \n",
    "\n",
    "es = ElasticsearchStore.from_documents( \n",
    "                            docs,\n",
    "                            embedding = embeddings, \n",
    "                            es_url = url, \n",
    "                            es_connection = connection,\n",
    "                            index_name = elastic_index_name, \n",
    "                            es_user = elastic_user,\n",
    "                            es_password = elastic_password)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a828e36-13b0-4d8a-9394-2853df21de49",
   "metadata": {},
   "source": [
    "# Setup query retriever"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "99a3b482-7f27-4ada-890d-5318da04f0d0",
   "metadata": {},
   "source": [
    "## Add details about metadata fields"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "7b19320f-b25d-4402-8fd8-9c80c30136dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata_field_info = [\n",
    "    AttributeInfo(\n",
    "        name=\"genre\",\n",
    "        description=\"The genre of the movie. Can be either 'science fiction' or 'animated'.\",\n",
    "        type=\"string or list[string]\",\n",
    "    ),\n",
    "    AttributeInfo(\n",
    "        name=\"year\",\n",
    "        description=\"The year the movie was released\",\n",
    "        type=\"integer\",\n",
    "    ),\n",
    "    AttributeInfo(\n",
    "        name=\"director\",\n",
    "        description=\"The name of the movie director\",\n",
    "        type=\"string\",\n",
    "    ),\n",
    "    AttributeInfo(\n",
    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
    "    ),\n",
    "]\n",
    "\n",
    "document_content_description = \"Brief summary of a movie\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "5b3c465b-2943-4d04-8375-e442345a990a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up openAI llm with sampling temperature 0\n",
    "llm = OpenAI(temperature=0, openai_api_key=openai_api_key)\n",
    "\n",
    "# instantiate retriever\n",
    "retriever = SelfQueryRetriever.from_llm(\n",
    "    llm, es, document_content_description, metadata_field_info, verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5641c90-4103-40f7-bf50-33d26d8ef427",
   "metadata": {},
   "source": [
    "# Question Answering with Self-Query Retriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "cff47644-2055-4373-b3ed-515f250a3f97",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nInception (2010)'"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.schema.runnable import RunnableParallel, RunnablePassthrough\n",
    "from langchain.prompts import ChatPromptTemplate, PromptTemplate\n",
    "from langchain.schema import format_document\n",
    "\n",
    "LLM_CONTEXT_PROMPT = ChatPromptTemplate.from_template(\"\"\"\n",
    "Use the following context movies that matched the user question. Use the movies below only to answer the user's question.\n",
    "\n",
    "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
    "\n",
    "----\n",
    "{context}\n",
    "----\n",
    "Question: {question}\n",
    "Answer:\n",
    "\"\"\")\n",
    "\n",
    "DOCUMENT_PROMPT = PromptTemplate.from_template(\"\"\"\n",
    "---\n",
    "title: {title}                                                                                   \n",
    "year: {year}  \n",
    "director: {director}    \n",
    "---\n",
    "\"\"\")\n",
    "\n",
    "def _combine_documents(\n",
    "    docs, document_prompt=DOCUMENT_PROMPT, document_separator=\"\\n\\n\"\n",
    "):\n",
    "    doc_strings = [format_document(doc, document_prompt) for doc in docs]\n",
    "    return document_separator.join(doc_strings)\n",
    "\n",
    "\n",
    "_context = RunnableParallel(\n",
    "    context=retriever | _combine_documents,\n",
    "    question=RunnablePassthrough(),\n",
    ")\n",
    "\n",
    "chain = (_context | LLM_CONTEXT_PROMPT | llm)\n",
    "\n",
    "chain.invoke(\"What movies are about dreams and it was released after the year 2009 but before the year 2011?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a62597ec-b0cf-42c6-b577-5b98cdfcd857",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python_3.11",
   "language": "python",
   "name": "python_3.11"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
